library(tidyverse)
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
all_counts <- readRDS(file= "Data/pc_count_matrix.rds")
avg_counts <- readRDS(file="Data/avg_pc_count_matrix.rds")
diff_counts <- readRDS(file="Data/diff_pc_count_matrix.rds")
meta <- suppressMessages(readr::read_tsv("Data/complete_meta_data.tsv"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="embryonic facial prominence", "facial prominence"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="skeletal muscle tissue", "muscle tissue"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="skeletal muscle tissue", "muscle tissue"))
tissue_types <- unique(meta %>% pull(tissue_type))
collapsed_dev_counts <- matrix(data=NA, nrow=nrow(all_counts),ncol=length(tissue_types))
collapsed_dev_counts_all <- matrix(data=NA, nrow=nrow(all_counts),ncol=length(tissue_types))
walker <- 1
for (tissue in tissue_types){
tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
tissue_subset_count <- all_counts[,tissue_subset_ids]
tissue_avg <- rowMeans(tissue_subset_count)
collapsed_dev_counts[,walker] <- tissue_avg
walker <- walker + 1
}
walker <- 1
for (tissue in tissue_types){
tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
tissue_subset_count <- all_counts
tissue_avg <- rowMeans(tissue_subset_count)
collapsed_dev_counts_all[,walker] <- tissue_avg
walker <- walker + 1
}
colnames(collapsed_dev_counts) <- tissue_types
rownames(collapsed_dev_counts) <- rownames(all_counts)
colnames(collapsed_dev_counts_all) <- tissue_types
TFs<- c("Ascl1", "Hes1", "Neurod1", "Mecp2", "Mef2c", "Runx1", "Tcf4", "Pax6")
Investigate the most highly expressed gene (sum of all stages) for each tissue type
maxes_list <- list()
for (tissue in tissue_types){
tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
all_sums <- apply(all_counts[,tissue_subset_ids],1, sum)
maxes_list[tissue] <- names(all_sums)[which.max(all_sums)]
}
for (name in names(maxes_list)){
print(paste0("The most highly expressed protein in ", name, " : ", maxes_list[name]))
}
## [1] "The most highly expressed protein in forebrain : Tuba1a"
## [1] "The most highly expressed protein in midbrain : Tuba1a"
## [1] "The most highly expressed protein in hindbrain : Tuba1a"
## [1] "The most highly expressed protein in facial prominence : Hba-a2"
## [1] "The most highly expressed protein in heart : Hba-a2"
## [1] "The most highly expressed protein in limb : Hba-a2"
## [1] "The most highly expressed protein in liver : Hba-a2"
## [1] "The most highly expressed protein in neural tube : Tuba1a"
## [1] "The most highly expressed protein in lung : Sftpc"
## [1] "The most highly expressed protein in stomach : Cym"
## [1] "The most highly expressed protein in intestine : mt-Atp6"
## [1] "The most highly expressed protein in kidney : mt-Atp6"
## [1] "The most highly expressed protein in thymus : Actb"
## [1] "The most highly expressed protein in muscle tissue : Acta1"
## [1] "The most highly expressed protein in spleen : Hba-a2"
## [1] "The most highly expressed protein in urinary bladder : Actg2"
## [1] "The most highly expressed protein in adrenal gland : mt-Atp6"
Same thing as above but excluding the mitochondria
maxes_list <- list()
for (tissue in tissue_types){
tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
all_sums <- apply(all_counts[,tissue_subset_ids],1, sum)
all_sums <- all_sums[!sapply(names(all_sums), startsWith, "mt-")]
maxes_list[tissue] <- names(all_sums)[which.max(all_sums)]
}
for (name in names(maxes_list)){
print(paste0("The most highly expressed protein in ", name, " : ", maxes_list[name]))
}
## [1] "The most highly expressed protein in forebrain : Tuba1a"
## [1] "The most highly expressed protein in midbrain : Tuba1a"
## [1] "The most highly expressed protein in hindbrain : Tuba1a"
## [1] "The most highly expressed protein in facial prominence : Hba-a2"
## [1] "The most highly expressed protein in heart : Hba-a2"
## [1] "The most highly expressed protein in limb : Hba-a2"
## [1] "The most highly expressed protein in liver : Hba-a2"
## [1] "The most highly expressed protein in neural tube : Tuba1a"
## [1] "The most highly expressed protein in lung : Sftpc"
## [1] "The most highly expressed protein in stomach : Cym"
## [1] "The most highly expressed protein in intestine : Apoa1"
## [1] "The most highly expressed protein in kidney : Hba-a2"
## [1] "The most highly expressed protein in thymus : Actb"
## [1] "The most highly expressed protein in muscle tissue : Acta1"
## [1] "The most highly expressed protein in spleen : Hba-a2"
## [1] "The most highly expressed protein in urinary bladder : Actg2"
## [1] "The most highly expressed protein in adrenal gland : Hba-a2"
Top 5 Universally highly expressed protein across all tissues regardless of stages (without processing)
##Collapse dev_stages
collapsed_matrices <- matrix(data=NA, nrow = nrow(all_counts), ncol = length(tissue_types))
rownames(collapsed_matrices) <- rownames(all_counts)
walker <- 1
for (tissue in tissue_types){
tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
all_avgs <- apply(all_counts[,tissue_subset_ids],1, mean)
collapsed_matrices[,walker] <- all_avgs
walker <- walker + 1
}
all_sums <- apply(collapsed_matrices,1, sum)
for (i in 1:10){
print(paste0("The highests expressed protein acrossed all tissue in order ",i ,": ", names(sort(all_sums,decreasing=TRUE))[i]))
}
## [1] "The highests expressed protein acrossed all tissue in order 1: Hba-a2"
## [1] "The highests expressed protein acrossed all tissue in order 2: Hbb-bs"
## [1] "The highests expressed protein acrossed all tissue in order 3: mt-Atp6"
## [1] "The highests expressed protein acrossed all tissue in order 4: Hba-a1"
## [1] "The highests expressed protein acrossed all tissue in order 5: Hbb-bt"
## [1] "The highests expressed protein acrossed all tissue in order 6: mt-Atp8"
## [1] "The highests expressed protein acrossed all tissue in order 7: Actb"
## [1] "The highests expressed protein acrossed all tissue in order 8: Ftl1"
## [1] "The highests expressed protein acrossed all tissue in order 9: Ppia"
## [1] "The highests expressed protein acrossed all tissue in order 10: Acta1"
Expression profile of the highest expressed protein across all tissues Hba-a2 Extract only Hba-a2 information
library(ggplot2)
target_protein <- "Hba-a2"
unique_dev_stages <- unique(meta %>% pull(dev_stage))
Hba_matrix <- matrix(data=NA, nrow=length(tissue_types), ncol=3)
colnames(Hba_matrix) <- c("count", "dev_stage", "tissue_type")
Hba_frame <- data.frame(Hba_matrix)
walker <- 1
for (exp_id in names(avg_counts[target_protein,])){
Hba_frame[walker, "count"] <- avg_counts[target_protein, exp_id]
Hba_frame[walker, "dev_stage"] <- meta %>% filter(id==exp_id) %>% pull(dev_stage)
Hba_frame[walker, "tissue_type"] <- meta %>% filter(id==exp_id) %>% pull(tissue_type)
walker <- walker + 1
}
Hba_frame$count <- log2(Hba_frame$count)
Plot the extracted information
library(ggrepel)
make_label <- function(row){
if (row['dev_stage'] == max(Hba_frame %>% filter(tissue_type==row['tissue_type']) %>% pull(dev_stage))){
#print(max(Hba_frame %>% filter(tissue_type==tissue) %>% pull(dev_stage)))\
return(as.character(row['tissue_type']))
}else {
return(NA_character_)
}
}
Hba_frame[,"label"] <- Hba_frame %>% apply(1,make_label)
# Hba_frame <- Hba_frame %>%
# mutate(label = if_else(dev_stage == max(Hba_frame %>% filter()), as.character(tissue_type), NA_character_))
ggplot(Hba_frame, aes(x=dev_stage, y=count, group=tissue_type, colour=tissue_type)) + geom_line() + geom_point() + expand_limits(x= c(4, 10)) +
# geom_text_repel(
# aes(label = gsub("^.*$", " ", label)),
# # This will force the correct position of the link's right end.
# segment.curvature = -0.1,
# segment.square = TRUE,
# segment.color = 'grey',
# box.padding = 0.1,
# point.padding = 0.6,
# nudge_x = 1,
# nudge_y = 1,
# force = 15,
# hjust = 0,
# direction = "y",
# na.rm = TRUE,
# xlim = c(5, 10),
# ylim = c(0, 150000),
# ) +
geom_text_repel(data = . %>% filter(!is.na(label)),
aes(label = paste0(" ", label)),
#segment.alpha = 0, ## This will 'hide' the link
segment.curvature = -0.1,
segment.square = TRUE,
# segment.color = 'grey',
box.padding = 0.1,
point.padding = 0.6,
nudge_x = 1,
nudge_y = 1,
force = 0.5,
hjust = 0,
direction="y",
na.rm = TRUE,
xlim = c(5, 10),
ylim = c(10,17),
) +
ylab("counts(log2)") + ggtitle("Expression pattern of Hba-a2 across tissues") + theme(plot.title = element_text(hjust = 0.5))
Top 5 Universally highly expressed protein across all tissues regardless of stages (by percentage in tissues)
##Collapse dev_stages
collapsed_matrices <- matrix(data=NA, nrow = nrow(all_counts), ncol = length(tissue_types))
rownames(collapsed_matrices) <- rownames(all_counts)
walker <- 1
for (tissue in tissue_types){
tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
all_avgs <- apply(all_counts[,tissue_subset_ids],1, mean)
percent_counts <- all_avgs/sum(all_avgs)
collapsed_matrices[,walker] <- percent_counts
walker <- walker + 1
}
all_sums <- apply(collapsed_matrices,1, sum)
for (i in 1:10){
print(paste0("The highests expressed protein acrossed all tissue in order ",i ,": ", names(sort(all_sums,decreasing=TRUE))[i]))
}
## [1] "The highests expressed protein acrossed all tissue in order 1: Hba-a2"
## [1] "The highests expressed protein acrossed all tissue in order 2: Hbb-bs"
## [1] "The highests expressed protein acrossed all tissue in order 3: mt-Atp6"
## [1] "The highests expressed protein acrossed all tissue in order 4: Hba-a1"
## [1] "The highests expressed protein acrossed all tissue in order 5: mt-Atp8"
## [1] "The highests expressed protein acrossed all tissue in order 6: Hbb-bt"
## [1] "The highests expressed protein acrossed all tissue in order 7: Actb"
## [1] "The highests expressed protein acrossed all tissue in order 8: Ftl1"
## [1] "The highests expressed protein acrossed all tissue in order 9: Ppia"
## [1] "The highests expressed protein acrossed all tissue in order 10: Acta1"
Top 5 Universally highly expressed protein across all tissues regardless of stages (by ranking)
##Collapse dev_stages
collapsed_matrices <- matrix(data=NA, nrow = nrow(all_counts), ncol = length(tissue_types))
rownames(collapsed_matrices) <- rownames(all_counts)
walker <- 1
for (tissue in tissue_types){
tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
all_avgs <- apply(all_counts[,tissue_subset_ids],1, mean)
all_ranking <- rank(all_avgs)
collapsed_matrices[,walker] <- all_ranking
walker <- walker + 1
}
all_sums <- apply(collapsed_matrices,1, sum)
for (i in 1:10){
print(paste0("The highests expressed protein acrossed all tissue in order ",i ,": ", names(sort(all_sums, decreasing = TRUE))[i]))
}
## [1] "The highests expressed protein acrossed all tissue in order 1: mt-Atp6"
## [1] "The highests expressed protein acrossed all tissue in order 2: mt-Atp8"
## [1] "The highests expressed protein acrossed all tissue in order 3: Hba-a2"
## [1] "The highests expressed protein acrossed all tissue in order 4: Actb"
## [1] "The highests expressed protein acrossed all tissue in order 5: Ppia"
## [1] "The highests expressed protein acrossed all tissue in order 6: Ftl1"
## [1] "The highests expressed protein acrossed all tissue in order 7: mt-Nd1"
## [1] "The highests expressed protein acrossed all tissue in order 8: Eef1a1"
## [1] "The highests expressed protein acrossed all tissue in order 9: mt-Cytb"
## [1] "The highests expressed protein acrossed all tissue in order 10: mt-Co1"
expression distribution acrossed different tissues (box plot)
library(cowplot)
all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
ggplot(data=all_counts_melted, aes(x=dev_stage, y=log2(count+1))) + geom_boxplot() + facet_wrap(~ tissue_type, ncol=3) + theme_cowplot(12) + geom_hline(yintercept=median(log2(all_counts+1)), colour="red")
expression distribution acrossed different tissues (histogram)
all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
suppressWarnings(ggplot(data=all_counts_melted, aes(x=log2(count+1), fill=dev_stage)) + geom_histogram(binwidth = 0.5) + facet_wrap(~ tissue_type, ncol=3) + theme_cowplot(12))
expression distribution acrossed different tissues (histogram log transformed)
all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
suppressWarnings(ggplot(data=all_counts_melted, aes(x=log2(count+1), y=..density.. ,fill=dev_stage)) + geom_histogram(binwidth = 0.5) + facet_wrap(~ tissue_type, ncol=3) + theme_cowplot(12))
expression distribution acrossed different tissues (box plot collapsed dev_stages)
melted_collapsed_dev <- pivot_longer(data.frame(collapsed_dev_counts), cols=(everything()), names_to = "tissue_type", values_to = "count")
ggplot(data=melted_collapsed_dev, aes(y=log2(count+1), x=tissue_type)) + geom_boxplot() + theme_cowplot(12) + xlab("tissue types") + geom_hline(yintercept=median(log2(all_counts+1)), colour="red") + theme(text = element_text(size=20), axis.text.x = element_text(angle=45, hjust=1, size=15))
expression distribution acrossed different tissues (box plot collapsed dev_stages not limited to pc genes)
melted_collapsed_dev_all <- pivot_longer(data.frame(collapsed_dev_counts_all), cols=(everything()), names_to = "tissue_type", values_to = "count")
ggplot(data=melted_collapsed_dev_all, aes(y=log10(count+1), x=tissue_type)) + geom_boxplot() + theme_cowplot(12) + xlab("tissue types") + geom_hline(yintercept=median(log2(all_counts+1)), colour="red") + theme(text = element_text(size=20), axis.text.x = element_text(angle=45, hjust=1, size=15))
expression distribution acrossed different tissues (histogram collapsed by dev_stage)
suppressWarnings(ggplot(data=melted_collapsed_dev, aes(x=log2(count+1), fill=tissue_type, y=..density..)) + geom_histogram(binwidth = 0.5) + theme_cowplot(12))
expression distribution acrossed different tissues (freq_plot collapsed by dev_stage)
ggplot(data=melted_collapsed_dev, aes(x=log2(count+1), y=..density.., colour=tissue_type)) + geom_freqpoly(binwidth=0.5) + theme_cowplot(12) + scale_x_continuous(limits=c(0, 20))
Mean-variance
library(ggrepel)
all_counts_mean_var <- all_counts
all_counts_mean_var <- cbind(all_counts_mean_var, mean=apply(all_counts,1,mean))
all_counts_mean_var <- cbind(all_counts_mean_var, variance=apply(all_counts,1,var))
ggplot(data=data.frame(all_counts_mean_var), aes(x=log2(mean+1), y=log2(variance+1))) + geom_point() + theme_cowplot(12) +
geom_text_repel(data=data.frame(all_counts_mean_var) %>% rownames_to_column(var="rowname") %>% slice_max(variance, n=10), aes(x=log2(mean+1), y=log2(variance), label=rowname, colour="red")) +
geom_text_repel(data=data.frame(all_counts_mean_var) %>% rownames_to_column(var="rowname") %>% slice_max(mean, n=10), aes(x=log2(mean+1), y=log2(variance), label=rowname, colour="blue")) + scale_colour_discrete(name = "Top elements", labels = c("Variance", "Mean"))
Mean and variance all together but hue by organism type
collapsed_matrices <- data.frame(mean=double(), variance=double(), tissue=character(), row=character())
colnames(collapsed_matrices) <- c("mean", "variance", "tissue")
for (tissue in tissue_types){
tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
all_means <- apply(all_counts[,tissue_subset_ids],1, mean)
all_variance <- apply(all_counts[,tissue_subset_ids],1, var)
temp_matrix <- data.frame(mean=all_means, variance=all_variance, tissue) %>% rownames_to_column("rowname")
collapsed_matrices <- rbind(collapsed_matrices, temp_matrix)
}
ggplot(data=collapsed_matrices, aes(x=log2(mean+1), y=log2(variance+1), colour=tissue)) + geom_point()+ theme_cowplot(12) +
geom_text_repel(data=collapsed_matrices %>% slice_max(variance+mean, n=15), aes(x=log2(mean+1), y=log2(variance+1), label=rowname, colour=tissue))
#geom_text_repel(data=collapsed_matrices %>% rownames_to_column("row") %>% slice_max(mean, n=10), aes(x=log2(mean+1), y=log2(variance+1), label=row, colour=tissue))
#scale_colour_discrete(name = "Top elements", labels = c("Variance", "Mean"))
Variability across tissue types
ggplot(data=collapsed_matrices, aes(x=log2(mean+1), y=log2(variance+1))) + geom_point() + facet_wrap(~ tissue, ncol=3) + theme_cowplot(12)
Zooming in on highly epxressed gene (>1025)
ggplot(data=collapsed_matrices %>% filter(log2(mean+1) >=10), aes(x=log2(mean+1), y=log2(variance+1))) + geom_point() + facet_wrap(~ tissue, ncol=3) + theme_cowplot(12)
Distribution of the TFs
TFCounts <- data.frame(all_counts[TFs,])
TFCounts <- TFCounts %>% rownames_to_column("rowname")
TFCounts <- TFCounts %>% gather("id", "counts", -rowname)
TFCounts <- TFCounts %>% left_join((meta %>% select(tissue_type, dev_stage, id)), by="id")
ggplot(data=TFCounts, aes(x=counts)) + geom_histogram(bins = 100) + facet_grid(~ rowname) + theme_cowplot(12)
Distribution of the TFs (Log transformed)
TFCounts <- data.frame(all_counts[TFs,])
TFCounts <- TFCounts %>% rownames_to_column("rowname")
TFCounts <- TFCounts %>% gather("id", "counts", -rowname)
TFCounts <- TFCounts %>% left_join((meta %>% select(tissue_type, dev_stage, id)), by="id")
ggplot(data=TFCounts, aes(x=log2(counts+1))) + geom_histogram(bins = 100) + facet_grid(~ rowname) + theme_cowplot(12)
Distribution of the TFs (Log transformed)
TFCounts <- data.frame(all_counts[TFs,])
TFCounts <- TFCounts %>% rownames_to_column("rowname")
TFCounts <- TFCounts %>% gather("id", "counts", -rowname)
TFCounts <- TFCounts %>% left_join((meta %>% select(tissue_type, dev_stage, id)), by="id")
ggplot(data=collapsed_matrices %>% filter(rowname %in% TFs), aes(x=log2(mean+1), y=log2(variance+1), colour=tissue)) + geom_point()+ theme_cowplot(12) +
geom_text_repel(data=collapsed_matrices %>% filter(rowname %in% TFs) %>% slice_max(variance+mean, n=15), aes(x=log2(mean+1), y=log2(variance+1), label=rowname, colour=tissue))
Box plots for TFs across tissues
Tcf4Counts <- TFCounts %>% filter(rowname=="Tcf4")
ggplot(data=TFCounts, aes(y=counts, x=tissue_type)) + geom_boxplot() + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ rowname, scales = "free") + geom_hline(yintercept=mean(TFCounts$counts), colour="red")
Tcf4 seems to be involved in neuronal differentiation so it makes sense for it to be highly expressed in the brains/neural tube but no so much in liver Some interesting observation, Mecp2 is basically not epxressed at all same as Runx1(except in thymus), and Tcf4 is one of the most active
Expression across dev_stage and tissues
ggplot(data=TFCounts, aes(x=dev_stage, y=counts, colour=rowname)) + geom_point() + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ tissue_type, scales = "free") + scale_colour_discrete(name = "Gene")
Expression across dev_stage and tissues (log transformed)
ggplot(data=TFCounts, aes(x=dev_stage, y=log2(counts+1), colour=rowname)) + geom_point() + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ tissue_type) + scale_colour_discrete(name = "Gene")
histograms of expresssion distribution by tissues
ggplot(data=TFCounts, aes(x=log2(counts+1), fill=rowname)) + geom_histogram(bins=100) + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ tissue_type, ncol=3,scales = "free") + scale_colour_discrete(name = "Gene")
cluster correlation heatmap
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer)
# Get some nicer colours
mypalette <- brewer.pal(11, "RdYlBu")
# http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3
morecols <- colorRampPalette(mypalette)
# Set up colour vector for celltype variable
col.cell <- (c("purple","orange")[meta$tissue_type])[1:(length(meta$tissue_type)/2)]
# Plot the
avg_counts_tissue_name <- avg_counts
colnames(avg_counts_tissue_name) <- sapply(colnames(avg_counts), function(x){temp_row <- meta %>% filter(id==x)
return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
heatmap.2(log2(avg_counts_tissue_name+1)[(order(apply(avg_counts,1,var), decreasing = TRUE))[1:50],],
col=rev(morecols(50)),
trace="column",
main="Top 50 most variable genes across samples",
ColSideColors=col.cell,
scale = "row")
cluster correlation heatmap (only TFs)
library(gplots)
library(RColorBrewer)
# Get some nicer colours
mypalette <- brewer.pal(11, "RdYlBu")
# http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3
morecols <- colorRampPalette(mypalette)
# Set up colour vector for celltype variable
col.cell <- (c("purple","orange")[meta$tissue_type])[1:(length(meta$tissue_type)/2)]
# Plot the heatmap
avg_counts_tissue_name <- avg_counts
colnames(avg_counts_tissue_name) <- sapply(colnames(avg_counts), function(x){temp_row <- meta %>% filter(id==x)
return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
heatmap.2(log2(avg_counts_tissue_name+1)[TFs,],
col=rev(morecols(length(TFs))),
trace="column",
main="TF count heat map across tissues",
ColSideColors=col.cell,
scale="row")
Midbrain cluster correlation heatmap (only TFs)
# Set up colour vector for celltype variable
col.cell <- (c("purple","orange")[meta$tissue_type])[1:(length(meta %>% filter(tissue_type=="midbrain") %>% pull(id)))]
# Plot the heatmap
midbrain_id <- meta %>% filter(tissue_type == "midbrain") %>% pull(id)
midbrain_counts <- all_counts[,midbrain_id]
colnames(midbrain_counts) <- sapply(colnames(midbrain_counts), function(x){temp_row <- meta %>% filter(id==x)
return(paste0(temp_row %>% pull(dev_stage)))})
heatmap.2(log2(midbrain_counts+1)[TFs,],
col=rev(morecols(length(TFs))),
trace="column",
main="TF count heat map across Midbrain",
ColSideColors=col.cell,
scale="row")
Hindbrain cluster correlation heatmap (only TFs)
# Set up colour vector for celltype variable
col.cell <- (c("purple","orange")[meta$tissue_type])[1:(length(meta %>% filter(tissue_type=="hindbrain") %>% pull(id)))]
# Plot the heatmap
hindbrain_id <- meta %>% filter(tissue_type == "hindbrain") %>% pull(id)
hindbrain_counts <- all_counts[,hindbrain_id]
colnames(hindbrain_counts) <- sapply(colnames(hindbrain_counts), function(x){temp_row <- meta %>% filter(id==x)
return(paste0(temp_row %>% pull(dev_stage)))})
heatmap.2(log2(hindbrain_counts+1)[TFs,],
col=rev(morecols(length(TFs))),
trace="column",
main="TF count heat map across Hindbrain",
ColSideColors=col.cell,
scale="row")
variability (across tissues, within tissues as well) mean-variance is the ranking similar across tissues
##Remove zero variance
all_counts_drop_var <- (t(all_counts))[, which(apply(t(all_counts),2,var) !=0)]
all_counts.pca <- prcomp(all_counts_drop_var, center = TRUE,scale. = TRUE)
library(ggfortify)
autoplot(all_counts.pca, data=meta, colour="dev_stage") + theme_cowplot(12)
variability (across tissues, within tissues as well) mean-variance is the ranking similar across tissues
##Remove zero variance
all_counts_drop_var <- (t(all_counts))[, which(apply(t(all_counts),2,var) !=0)]
all_counts.pca <- prcomp(all_counts_drop_var, center = TRUE,scale. = TRUE)
library(ggfortify)
autoplot(all_counts.pca, data=meta, colour="tissue_type") + theme_cowplot(12)